I'm gonna overwrite a lot of this notebook's old content. I changed the way I'm calculating wt, and wanna test that my training worked.
In [1]:
from pearce.emulator import OriginalRecipe, ExtraCrispy
from pearce.mocks import cat_dict
import numpy as np
from os import path
In [2]:
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
In [3]:
%%bash
ls ~/des/Pearce*.hdf5
In [4]:
training_file = '/u/ki/swmclau2/des/PearceRedMagicXiCosmo.hdf5'
test_file = '/u/ki/swmclau2/des/PearceRedMagicXiCosmoTest.hdf5'
em_method = 'gp'
split_method = 'random'
In [5]:
a = 1.0
z = 1.0/a - 1.0
In [6]:
fixed_params = {'z':z, 'r':24.06822623}
In [7]:
emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params,\
downsample_factor=1.0)#,
#hyperparams = {'n_estimators': 500,
# 'max_depth': 5})
In [8]:
emu.get_param_names()
Out[8]:
In [9]:
import h5py
f = h5py.File(training_file, 'r')
In [10]:
np.array(f['cosmo_no_00/a_1.000/obs']).shape
Out[10]:
In [11]:
emu.x.shape[0] + 12788
Out[11]:
In [12]:
for pname in emu.get_param_names():
print pname, emu.get_param_bounds(pname)
In [13]:
emu.get_param_names()
Out[13]:
In [14]:
zhongzhu_dict = {'omch2':[0.2661017,1.8339794 ], 'ombh2':[0.1054246,10.6161248], 'ln10As':[1.1295944,2.2441632],\
'H0':[0.3643993,13.8155106],\
'ns':[0.2408568,10.6371797], 'Neff':[11.5649985,11.3512804], 'w0':[5.6407612,7.342365 ],\
'logM0': [4.9071932,3.1795786,],\
'alpha':[10.6279446,3.7658774], 'logM1':[11.7621938,5.0188608], 'sigma_logM':[4.7031938, 4.6846614], 'logMmin':[1.0, 1.0],
'amp':[-12.0550382, 0.0,-1.5383083], 'r':[0.0, 0.0]}
names = ['amp']
names.extend(emu.get_param_names())
from itertools import cycle
names = cycle(names)
amp_count = 0
v = []
for n in names:
if n== 'amp':
amp_count+=1
v.append(zhongzhu_dict[n][amp_count-1]) #this is a poison hack dont judge me
#v.append(zhongzhu_dict[n][amp_count]) #this is a poison hack dont judge me
if amp_count==3:
break
v = np.array(v)
print v
In [15]:
emu._emulator.set_parameter_vector(v)
In [16]:
v
Out[16]:
In [17]:
print emu._emulator.kernel
In [18]:
emu.scale_bin_centers
Out[18]:
In [19]:
emu.n_bins
Out[19]:
In [20]:
#print emu.x.shape
#print emu.downsample_x.shape
if hasattr(emu, "_emulators"):
print emu._emulators[0]._x.shape
else:
print emu._emulator._x.shape
In [21]:
emu._ordered_params
Out[21]:
In [22]:
from sklearn.model_selection import train_test_split
In [23]:
x, y, yerr = emu.x, emu.y, emu.yerr
downsample_idxs = np.random.choice(x.shape[0], size = int(0.08*x.shape[0]), replace = False)
x,y, yerr = x[downsample_idxs, :], y[downsample_idxs], yerr[downsample_idxs]
train_x, test_x, train_y, test_y, train_yerr, test_yerr = train_test_split(x, y, yerr, test_size = 0.1)
In [24]:
model = emu._emulator
model.compute(train_x, train_yerr)
In [25]:
pred_y = model.predict(train_y, test_x, False, False, False)*emu._y_std + emu._y_mean
In [26]:
np.mean(np.abs((pred_y-test_y)/test_y))
Out[26]:
In [20]:
model = emu._emulator
ypred = model.predict(emu.y, emu.x, False, False, False)*emu._y_std+emu._y_mean
In [21]:
resids = np.abs(emu.y*emu._y_std+emu._y_mean - ypred)
In [22]:
np.mean(resids/(emu.y*emu._y_std+emu._y_mean))
Out[22]:
In [23]:
ypred.mean(), emu._y_mean
Out[23]:
In [24]:
test_gof = emu.goodness_of_fit(test_file, statistic = 'log_frac')
print test_gof.mean()
In [25]:
test_gof = emu.goodness_of_fit(test_file, statistic = 'frac')
print test_gof.mean()
In [26]:
plt.hist(np.log10(test_gof));
In [27]:
test_x, test_y, test_yerr, _ = emu.get_data(test_file,fixed_params, None)
In [28]:
test_x
Out[28]:
In [29]:
(emu.x*emu._x_std) + emu._x_mean
Out[29]:
In [30]:
emu.get_param_names()
Out[30]:
In [31]:
test_x_white, test_y_white = (test_x - emu._x_mean)/(emu._x_std + 1e-5), (test_y - emu._y_mean)/(emu._y_std + 1e-5)
In [32]:
model = emu._emulator
In [33]:
pred_y_white = model.predict(emu.y, test_x_white, False, False, False)
In [34]:
pred_y = pred_y_white*emu._y_std + emu._y_mean
In [35]:
plt.plot(pred_y[:100], label = 'pred')
plt.plot(test_y[:100], label = 'truth')
plt.legend(loc = 'best')
Out[35]:
In [36]:
test_y.mean(), emu._y_mean, pred_y.mean()
Out[36]:
In [37]:
test_y.std(), emu._y_std, pred_y.std()
Out[37]:
In [38]:
plt.hist(pred_y_white, bins = np.linspace(-3, 3, 100), label = 'Pred')
plt.hist(test_y_white, bins = np.linspace(-3, 3, 100), label = 'Test', alpha = 0.4);
plt.legend(loc = 'best')
Out[38]:
In [ ]:
In [ ]: